% List of all Lingvo publications, grouped by topic, and listed in
% chronological order with the newest papers on the bottom of each group.

% Common conferences.
@string{acl = {Proc. Annual Meeting of the Association for Computational Linguistics ({ACL})}}
@string{emnlp = {Proc. Conference on Empirical Methods in Natural Language Processing ({EMNLP})}}
@string{icassp = {Proc. {IEEE} International Conference on Acoustics, Speech, and Signal Processing ({ICASSP})}}
@string{iclr = {Proc. International Conference on Learning Representations ({ICLR})}}
@string{icml = {Proc. International Conference on Machine Learning ({ICML})}}
@string{interspeech = {Proc. Interspeech}}
@string{nips = {Advances in Neural Information Processing Systems}}
@string{slt = {Proc. {IEEE} Spoken Language Technology Workshop ({SLT})}}

% Topic annotations, used by update_publications.sh to update the markdown index.
@string{nmt = {Translation}}
@string{tts = {Speech synthesis}}
@string{asr = {Speech recognition}}
@string{st = {Speech-to-text translation}}
@string{nlu = {Language Understanding}}

%%%%%%%%%%%%%%%%%%%%
% Translation papers.

@inproceedings{chen2018best,
  annote = nmt,
  author = {M. X. Chen and O. Firat and A. Bapna and M. Johnson and W. Macherey and G. Foster and L. Jones and M. Schuster and N. Shazeer and N. Parmar and A. Vaswani and J. Uszkoreit and L. Kaiser and Z. Chen and Y. Wu and M. Hughes},
  title = {{The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation}},
  booktitle = acl,
  year = {2018},
  pdf = {https://arxiv.org/abs/1804.09849},
}

@inproceedings{cherry2018revisiting,
  annote = nmt,
  author = {C. Cherry and G. Foster and A. Bapna and O. Firat and W. Macherey},
  title = {Revisiting Character-Based Neural Machine Translation with Capacity and Compression},
  booktitle = emnlp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1808.09943},
}

@inproceedings{bapna2018training,
  annote = nmt,
  author = {A. Bapna and M. X. Chen and O. Firat and Y. Cao and Y. Wu},
  title = {Training Deeper Neural Machine Translation Models with Transparent Attention},
  booktitle = emnlp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1808.07561},
}

@techreport{wu2016google,
  annote = nmt,
  title = {Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
  author = {Y. Wu and M. Schuster and Z. Chen and Q. V. Le and M. Norouzi and W. Macherey and M. Krikun and Y. Cao and Q. Gao and K. Macherey and J. Klingner and A. Shah and M. Johnson and X. Liu and \L. Kaiser and S. Gouws and Y. Kato and T. Kudo and H. Kazawa and K. Stevens and G. Kurian and N. Patil and W. Wang and C. Young and J. Smith and J. Riesa and A. Rudnick and O. Vinyals and G. Corrado and M. Hughes and J. Dean},
  year = {2016},
  pdf = {https://arxiv.org/abs/1609.08144},
}


%%%%%%%%%%%%%%%%%%%%
% Speech recognition papers.

@inproceedings{chiu2018state,
  annote = asr,
  title = {State-of-the-art Speech Recognition With Sequence-to-Sequence Models},
  author = {C.-C.Chiu and T. N. Sainath and Y. Wu and R. Prabhavalkar and P. Nguyen and Z. Chen and A. Kannan and R. J. Weiss and K. Rao and K. Gonina and N. Jaitly and B. Li and J. Chorowski and M. Bacchiani},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1712.01769},
}

@inproceedings{toshniwal2018multilingual,
  annote = asr,
  title = {Multilingual Speech Recognition With A Single End-To-End Model},
  author = {S. Toshniwal and T. N. Sainath and R. J. Weiss and B. Li and P. Moreno and E. Weinstein and K. Rao},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1711.01694},
}

@inproceedings{li2018multidialect,
  annote = asr,
  title = {{Multi-Dialect Speech Recognition With a Single Sequence-to-Sequence Model}},
  author = {B. Li and T. N. Sainath and K. Sim and M. Bacchiani and E. Weinstein and P. Nguyen and Z. Chen and Y. Wu and K. Rao},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1712.01541},
}

@inproceedings{sainath2018no,
  annote = asr,
  title = {{No Need for a Lexicon? Evaluating the Value of the Pronunciation Lexica in End-to-End Models}},
  author = {T. N. Sainath and P. Prabhavalkar and S. Kumar and S. Lee and A. Kannan and D. Rybach and V. Schogol and P. Nguyen and B. Li and Y. Wu and Z. Chen and C. C. Chiu},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1712.01864},
}

@inproceedings{lawson2018learning,
  annote = asr,
  title = {{Learning hard alignments with variational inference}},
  author = {D. Lawson and C. C. Chiu and G. Tucker and C. Raffel and K. Swersky and N. Jaitly},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1705.05524},
}

@inproceedings{kannan2018analysis,
  annote = asr,
  title = {An Analysis of Incorporating an External Language Model into a Sequence-to-Sequence Model},
  author = {A. Kannan and Y. Wu and P. Nguyen and T. N. Sainath and Z. Chen and R. Prabhavalkar},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1712.01996},
}

@inproceedings{prabhavalkar2018minimum,
  annote = asr,
  author = {R. Prabhavalkar and T. N. Sainath and Y. Wu and P. Nguyen and Z. Chen and C. C. Chiu and A. Kannan},
  title = {{Minimum Word Error Rate Training for Attention-based Sequence-to-sequence Models}},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1712.01818},
}

@inproceedings{sainath2018improving,
  annote = asr,
  title = {{Improving the Performance of Online Neural Transducer Models}},
  author = {T. N. Sainath and C. C. Chiu and R. Prabhavalkar and A. Kannan and Y. Wu and P. Nguyen and Z. Chen Z},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1712.01807},
}

@inproceedings{chiu2018monotonic,
  annote = asr,
  title = {{Monotonic Chunkwise Attention}},
  author = {C. C. Chiu and C. Raffel},
  booktitle = iclr,
  year = {2018},
  pdf = {https://arxiv.org/abs/1712.05382},
}

@inproceedings{williams2018contextual,
  annote = asr,
  title = {{Contextual Speech Recognition in End-to-End Neural Network Systems using Beam Search}},
  author = {I. Williams and A. Kannan and P. Aleksic and D. Rybach and T. N. Sainath TN},
  booktitle = interspeech,
  year = {2018},
  pdf = {https://www.isca-speech.org/archive/Interspeech_2018/pdfs/2416.pdf},
}

@inproceedings{chui2018speech,
  annote = asr,
  title = {{Speech recognition for medical conversations}},
  author = {C. C. Chiu and A. Tripathi and K. Chou and C. Co and N. Jaitly and D. Jaunzeikare and A. Kannan and P. Nguyen and H. Sak and A. Sankar and J. Tansuwan and N. Wan and Y. Wu and X. Zhang},
  booktitle = interspeech,
  year = {2018},
  pdf = {https://arxiv.org/abs/1711.07274},
}

@inproceedings{pang2018compression,
  annote = asr,
  title = {{Compression of End-to-End Models}},
  author = {R. Pang and T. N. Sainath and R. Prabhavalkar and S. Gupta and Y. Wu and S. Zhang and C. C. Chiu},
  booktitle = interspeech,
  year = {2018},
  pdf = {https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1025.pdf},
}

@inproceedings{toshniwal2018comparison,
  annote = asr,
  title = {A comparison of techniques for language model integration in encoder-decoder speech recognition},
  author = {S. Toshniwal and A. Kannan and C. C. Chiu and Y. Wu and T. N. Sainath and K. Livescu},
  booktitle = slt,
  year = {2018},
  pdf = {https://arxiv.org/pdf/1807.10857.pdf},
}

@inproceedings{pundak2018deep,
  annote = asr,
  title = {Deep Context: End-to-End Contextual Speech Recognition},
  author = {G. Pundak and T. N. Sainath and R. Prabhavalkar and A. Kannan and D. Zhao},
  booktitle = slt,
  year = {2018},
  pdf = {https://arxiv.org/pdf/1808.02480.pdf},
}

@inproceedings{li2019bytes,
  annote = asr,
  author = {B. Li and Y. Zhang and T. N. Sainath and Y. Wu and W. Chan},
  title = {Bytes are All You Need: End-to-End Multilingual Speech Recognition and Synthesis with Bytes},
  booktitle = icassp,
  year = {2019},
  pdf = {https://arxiv.org/abs/1811.09021},
}

@inproceedings{guo2019spelling,
  annote = asr,
  author = {J. Guo and T. N. Sainath and R. J. Weiss},
  title = {A Spelling Correction Model for End-to-End Speech Recognition},
  booktitle = icassp,
  year = {2019},
  pdf = {https://arxiv.org/abs/1902.07178},
}

%%%%%%%%%%%%%%%%%%%%
% Language understanding papers.

@inproceedings{kannan2018semi,
  annote = nlu,
  author = {A. Kannan and K. Chen and D. Jaunzeikare and A. Rajkomar},
  title = {{Semi-Supervised Learning for Information Extraction from Dialogue}},
  booktitle = interspeech,
  year = {2018},
  pdf = {https://www.isca-speech.org/archive/Interspeech_2018/pdfs/1318.pdf},
}

@inproceedings{yavuz2018calcs,
  annote = nlu,
  author = {S. Yavuz and C. C. Chiu and P. Nguyen and Y. Wu},
  title = {{CaLcs: Continuously Approximating Longest Common Subsequence for Sequence Level Optimization}},
  booktitle = emnlp,
  year = {2018},
  pdf = {http://aclweb.org/anthology/D18-1406},
}

@inproceedings{haghani2018s2p,
  annote = nlu,
  author = {P. Haghani and A. Narayanan and M. Bacchiani and G. Chuang and N. Gaur and P. Moreno and R. Prabhavalkar and Z. Qu and A. Waters},
  title = {{From Audio to Semantics: Approaches to End-to-End Spoken Language Understanding}},
  booktitle = slt,
  year = {2018},
  pdf = {https://arxiv.org/pdf/1809.09190.pdf},
}


%%%%%%%%%%%%%%%%%%%%
% Speech synthesis papers.

@inproceedings{shen2018natural,
  annote = tts,
  title = {Natural {TTS} Synthesis by Conditioning {WaveNet} on Mel Spectrogram Predictions},
  author =  {J. Shen and R. Pang and  R. J. Weiss and M. Schuster and N. Jaitly and Z. Yang and Z. Chen and Y. Zhang and Y. Wang and R.J. Skerry-Ryan and R. A. Saurous and Y. Agiomyrgiannakis and Y. Wu},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1703.10135},
  sound_examples = {https://google.github.io/tacotron/publications/tacotron2/index.html},
}

@inproceedings{chorowski2018styletransfer,
  annote = tts,
  title = {On Using Backpropagation for Speech Texture Generation and Voice Conversion},
  author = {J. Chorowski and R. J. Weiss and R. A. Saurous and S. Bengio},
  booktitle = icassp,
  year = {2018},
  pdf = {https://arxiv.org/abs/1712.08363},
  sound_examples = {https://google.github.io/speech_style_transfer/samples.html},
}

@inproceedings{jia2018multispeaker,
  annote = tts,
  title = {Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis},
  author = {Y. Jia and Y. Zhang and R. J. Weiss and Q. Wang and J. Shen and F. Ren and Z. Chen and P. Nguyen and R. Pang and I. Lopez-Moreno and Y. Wu},
  booktitle = nips,
  year = {2018},
  pdf = {https://arxiv.org/abs/1806.04558},
  sound_examples = {https://google.github.io/tacotron/publications/speaker_adaptation/index.html},
}

@inproceedings{hsu2019hierarchical,
  annote = tts,
  title = {Hierarchical Generative Modeling for Controllable Speech Synthesis},
  author = {W. N. Hsu and Y. Zhang and R. J. Weiss and H. Zen and Y. Wu and Y. Wang and Y. Cao and Y. Jia and Z. Chen and J. Shen and P. Nguyen and R. Pang},
  booktitle = iclr,
  year = {2019},
  pdf = {https://arxiv.org/abs/1810.07217},
  sound_examples = {https://google.github.io/tacotron/publications/gmvae_controllable_tts/index.html},
}

@inproceedings{hsu2018disentangling,
  annote = tts,
  author = {W. N. Hsu and Y. Zhang and R. J. Weiss and Y. A. Chung and Y. Wang and Y. Wu and J. Glass},
  title = {Disentangling Correlated Speaker and Noise for Speech Synthesis via Data Augmentation and Adversarial Factorization},
  booktitle = {NeurIPS 2018 Workshop on Interpretability and Robustness in Audio, Speech, and Language},
  year = {2018},
  pdf = {https://openreview.net/forum?id=Bkg9ZeBB37},
}


%%%%%%%%%%%%%%%%%%%%
% Speech-to-text translation papers.

@inproceedings{weiss2017sequence,
  annote = st,
  title = {Sequence-to-Sequence Models Can Directly Translate Foreign Speech},
  author = {R. J. Weiss and J. Chorowski and N. Jaitly and Y. Wu and Z. Chen},
  booktitle = interspeech,
  year = {2017},
  pdf = {https://arxiv.org/abs/1703.08581},
}

@inproceedings{jia2019leveraging,
  annote = st,
  author = {Y. Jia and M. Johnson and W. Macherey and R. J. Weiss and Y. Cao and C. C. Chiu and N. Ari and S. Laurenzo and Y. Wu},
  title = {Leveraging Weakly Supervised Data to Improve End-to-End Speech-to-Text Translation},
  booktitle = icassp,
  year = {2019},
  pdf = {https://arxiv.org/abs/1811.02050},
}
